Library loading and set up

suppressMessages(
  c(library(ggplot2),
    library(mixOmics),
    library(gplots),
    library(RColorBrewer),
    library(readr),
    library(dplyr),
    library(tidyverse),
    library(knitr),
    library(rafalib),
    library(reshape),
    library(clusterProfiler),
    library(org.Mm.eg.db),
    library(AnnotationDbi))
)
##   [1] "ggplot2"         "stats"           "graphics"        "grDevices"      
##   [5] "utils"           "datasets"        "methods"         "base"           
##   [9] "mixOmics"        "lattice"         "MASS"            "ggplot2"        
##  [13] "stats"           "graphics"        "grDevices"       "utils"          
##  [17] "datasets"        "methods"         "base"            "gplots"         
##  [21] "mixOmics"        "lattice"         "MASS"            "ggplot2"        
##  [25] "stats"           "graphics"        "grDevices"       "utils"          
##  [29] "datasets"        "methods"         "base"            "RColorBrewer"   
##  [33] "gplots"          "mixOmics"        "lattice"         "MASS"           
##  [37] "ggplot2"         "stats"           "graphics"        "grDevices"      
##  [41] "utils"           "datasets"        "methods"         "base"           
##  [45] "readr"           "RColorBrewer"    "gplots"          "mixOmics"       
##  [49] "lattice"         "MASS"            "ggplot2"         "stats"          
##  [53] "graphics"        "grDevices"       "utils"           "datasets"       
##  [57] "methods"         "base"            "dplyr"           "readr"          
##  [61] "RColorBrewer"    "gplots"          "mixOmics"        "lattice"        
##  [65] "MASS"            "ggplot2"         "stats"           "graphics"       
##  [69] "grDevices"       "utils"           "datasets"        "methods"        
##  [73] "base"            "forcats"         "stringr"         "purrr"          
##  [77] "tidyr"           "tibble"          "tidyverse"       "dplyr"          
##  [81] "readr"           "RColorBrewer"    "gplots"          "mixOmics"       
##  [85] "lattice"         "MASS"            "ggplot2"         "stats"          
##  [89] "graphics"        "grDevices"       "utils"           "datasets"       
##  [93] "methods"         "base"            "knitr"           "forcats"        
##  [97] "stringr"         "purrr"           "tidyr"           "tibble"         
## [101] "tidyverse"       "dplyr"           "readr"           "RColorBrewer"   
## [105] "gplots"          "mixOmics"        "lattice"         "MASS"           
## [109] "ggplot2"         "stats"           "graphics"        "grDevices"      
## [113] "utils"           "datasets"        "methods"         "base"           
## [117] "rafalib"         "knitr"           "forcats"         "stringr"        
## [121] "purrr"           "tidyr"           "tibble"          "tidyverse"      
## [125] "dplyr"           "readr"           "RColorBrewer"    "gplots"         
## [129] "mixOmics"        "lattice"         "MASS"            "ggplot2"        
## [133] "stats"           "graphics"        "grDevices"       "utils"          
## [137] "datasets"        "methods"         "base"            "reshape"        
## [141] "rafalib"         "knitr"           "forcats"         "stringr"        
## [145] "purrr"           "tidyr"           "tibble"          "tidyverse"      
## [149] "dplyr"           "readr"           "RColorBrewer"    "gplots"         
## [153] "mixOmics"        "lattice"         "MASS"            "ggplot2"        
## [157] "stats"           "graphics"        "grDevices"       "utils"          
## [161] "datasets"        "methods"         "base"            "clusterProfiler"
## [165] "reshape"         "rafalib"         "knitr"           "forcats"        
## [169] "stringr"         "purrr"           "tidyr"           "tibble"         
## [173] "tidyverse"       "dplyr"           "readr"           "RColorBrewer"   
## [177] "gplots"          "mixOmics"        "lattice"         "MASS"           
## [181] "ggplot2"         "stats"           "graphics"        "grDevices"      
## [185] "utils"           "datasets"        "methods"         "base"           
## [189] "org.Mm.eg.db"    "AnnotationDbi"   "IRanges"         "S4Vectors"      
## [193] "Biobase"         "BiocGenerics"    "parallel"        "stats4"         
## [197] "clusterProfiler" "reshape"         "rafalib"         "knitr"          
## [201] "forcats"         "stringr"         "purrr"           "tidyr"          
## [205] "tibble"          "tidyverse"       "dplyr"           "readr"          
## [209] "RColorBrewer"    "gplots"          "mixOmics"        "lattice"        
## [213] "MASS"            "ggplot2"         "stats"           "graphics"       
## [217] "grDevices"       "utils"           "datasets"        "methods"        
## [221] "base"            "org.Mm.eg.db"    "AnnotationDbi"   "IRanges"        
## [225] "S4Vectors"       "Biobase"         "BiocGenerics"    "parallel"       
## [229] "stats4"          "clusterProfiler" "reshape"         "rafalib"        
## [233] "knitr"           "forcats"         "stringr"         "purrr"          
## [237] "tidyr"           "tibble"          "tidyverse"       "dplyr"          
## [241] "readr"           "RColorBrewer"    "gplots"          "mixOmics"       
## [245] "lattice"         "MASS"            "ggplot2"         "stats"          
## [249] "graphics"        "grDevices"       "utils"           "datasets"       
## [253] "methods"         "base"

Select genes for heatmap

We will select the top 5000 genes with the highest variances.

VST_TNF <- read_csv("VST_TNF.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_character()
## )
## See spec(...) for full column specifications.
dat <- as.data.frame(VST_TNF[, c(18:22, 8:12, 2:7)])
rownames(dat) <- VST_TNF$X1
# calculate row variance
row_var <- apply(dat, 1, var)
filter_dat <- dat[order(-row_var)[1:5000],]

# Plot the heatmap
suppressMessages(library(mosaic))

for (i in 1:dim(filter_dat)[1]) {
  filter_dat[i,1:16] <- zscore(as.numeric(filter_dat[i,1:16]))
}

my_palette <- colorRampPalette(c("blue", "white", "red"))(128)
heatmap_matrix <- as.matrix(filter_dat[,1:16])

png('Top 5000 variance.png',
    width = 1000,
    height = 1200,
    res = 200,
    pointsize = 8)
heatmap.2(heatmap_matrix,
          main = "Top 5000 variance",
          density.info = "none",
          key = TRUE,
          lhei = c(1,7),
          col=my_palette,
          cexCol = 1,
          margins = c(8,2),
          trace = "none",
          dendrogram = "row",
          labRow = FALSE,
          keysize = 2,
          ylab = "Genes",
          Colv = "NA")
dev.off()
## quartz_off_screen 
##                 2
pdf('Top 5000 variance.pdf',
    width = 8,
    height = 10)
heatmap.2(heatmap_matrix,
          main = "Top 5000 variance",
          density.info = "none",
          key = TRUE,
          lhei = c(1,7),
          col=my_palette,
          cexCol = 1,
          margins = c(8,2),
          trace = "none",
          dendrogram = "row",
          labRow = FALSE,
          keysize = 2,
          ylab = "Genes",
          Colv = "NA")
dev.off()
## quartz_off_screen 
##                 2
include_graphics('Top 5000 variance.png')

Examine the clustering of these genes

row_clust <- hclust(dist(filter_dat, method = 'euclidean'), method = 'ward.D2')
plot(row_clust, labels = FALSE)
abline(h = 34, col = "red")

# seperate clusters
h = 34
clusters <- cutree(row_clust, h = h)

# number of distinct cluster we get 
length(unique(clusters))
## [1] 11
# generate of list of genes for the 10 clusters
genecluster <- function(n) {
  index <- which(clusters == n)
  row_clust$labels[index]
}

cluster_gene <- lapply(1:length(unique(clusters)), genecluster)
names(cluster_gene) <- paste("cluster", c(1:length(unique(clusters))))

Now we need to make a function that would plot a boxplot for all the genes in each cluster.

cluster_boxplot <- function(a) {
  # plot the boxplots
  gene_list <- cluster_gene[[a]]
  cluster_data <- filter_dat[gene_list,]
  suppressMessages(df <- melt(cluster_data, variable_name = "Samples"))
  df <- data.frame(df, Condition = substr(df$Samples,1,4))
  ggplot(df, aes(x=Samples, y=value, fill = Condition)) + geom_boxplot() + xlab("") + 
  ylab("Z-scored gene expression") + scale_fill_manual(values = c("#619CFF", "#F564E3", "#E69F00")) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ggtitle(names(cluster_gene)[a])
}

lapply(1:11, cluster_boxplot)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

cluster_heatmap <- function(a) {
  gene_list <- cluster_gene[[a]]
  # plot the heatmap
  heatmap_matrix <- as.matrix(filter_dat[gene_list,])
  png(paste(names(cluster_gene)[a], "heatmap.png"),
      width = 800,
      height = 1000,
      res = 150,
      pointsize = 8)
  heatmap.2(heatmap_matrix,
            main = paste(names(cluster_gene)[a], "heatmap"),
            density.info = "none",
            key = TRUE,
            lhei = c(1,7),
            col=my_palette,
            cexCol = 1,
            margins = c(8,2),
            trace = "none",
            dendrogram = "row",
            labRow = FALSE,
            keysize = 2,
            ylab = "Genes",
            Colv = "NA")
  dev.off()
}

lapply(1:11, cluster_heatmap)
## [[1]]
## quartz_off_screen 
##                 2 
## 
## [[2]]
## quartz_off_screen 
##                 2 
## 
## [[3]]
## quartz_off_screen 
##                 2 
## 
## [[4]]
## quartz_off_screen 
##                 2 
## 
## [[5]]
## quartz_off_screen 
##                 2 
## 
## [[6]]
## quartz_off_screen 
##                 2 
## 
## [[7]]
## quartz_off_screen 
##                 2 
## 
## [[8]]
## quartz_off_screen 
##                 2 
## 
## [[9]]
## quartz_off_screen 
##                 2 
## 
## [[10]]
## quartz_off_screen 
##                 2 
## 
## [[11]]
## quartz_off_screen 
##                 2
include_graphics("cluster 1 heatmap.png")

include_graphics("cluster 2 heatmap.png")

include_graphics("cluster 3 heatmap.png")

include_graphics("cluster 4 heatmap.png")

include_graphics("cluster 5 heatmap.png")

include_graphics("cluster 6 heatmap.png")

include_graphics("cluster 7 heatmap.png")

include_graphics("cluster 8 heatmap.png")

include_graphics("cluster 9 heatmap.png")

include_graphics("cluster 10 heatmap.png")

include_graphics("cluster 11 heatmap.png")

KEGG analysis for interesting clusters

Based on the previous analysis, cluster 4 and 11 are selected as being inflammation related. Cluster 7 and 8 are selected as MK2 signature clusters.

# Convert Ensembl ID to Entrez ID
detected_gene <- rownames(filter_dat)
annotations_entrez <- AnnotationDbi::select(org.Mm.eg.db,
                                           keys = detected_gene,
                                           columns = c("ENTREZID"),
                                           keytype = "ENSEMBL")
## 'select()' returned 1:many mapping between keys and columns
detected_gene_entrez <- as.character(unique(annotations_entrez$ENTREZID[!is.na(annotations_entrez$ENTREZID)]))


## for each cluster
lapply(c(4,11,7,8), function(i) {
  target_gene <- cluster_gene[[i]]
  annotations_entrez <- AnnotationDbi::select(org.Mm.eg.db,
                                           keys = target_gene,
                                           columns = c("ENTREZID"),
                                           keytype = "ENSEMBL")

  target_gene_entrez <- as.character(unique(annotations_entrez$ENTREZID[!is.na(annotations_entrez$ENTREZID)]))


  kk <- enrichKEGG(gene = target_gene_entrez, universe = detected_gene_entrez, organism = 'mmu')
  write.csv(as.data.frame(kk), paste("KEGG analysis/KEGG_cluster ", i, ".csv", sep = ""))
  dotplot(kk, showCategory = 20) + labs(title = paste("KEGG for genes in", names(cluster_gene)[i]))
})
## 'select()' returned 1:many mapping between keys and columns
## Reading KEGG annotation online:
## 
## Reading KEGG annotation online:
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]